Setup

Install and Attach

knitr::opts_chunk$set(echo = TRUE)
libinstall <- function(pkg) {
    if(!require(pkg, character.only = T))install.packages(pkg)
    library(pkg, character.only = T)
}

libinstall("tidyverse")
## Loading required package: tidyverse
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.5     v dplyr   1.0.3
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
libinstall("glue")
## Loading required package: glue
## 
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
## 
##     collapse
libinstall("readr")
libinstall("plotly")
## Loading required package: plotly
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
libinstall("readr")
libinstall("readxl")
## Loading required package: readxl
libinstall("lubridate")
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
libinstall("curl")
## Loading required package: curl
## 
## Attaching package: 'curl'
## The following object is masked from 'package:readr':
## 
##     parse_date

Import Data

Import cpsaat Data

# Download cpsaat data
curl_download("https://www.bls.gov/cps/cpsaat11.xlsx", destfile = "cpsaat11.xlsx")

# Import cpsaat
cpsaat11 <- read_excel(
    "cpsaat11.xlsx",
    col_names = c(
        "Occupation",
        "Total",
        "Women",
        "White",
        "Black/African American",
        "Asian",
        "Hispanic/Latino"
    ),
    na = "–",
    col_types = c(
        Occupation="text",
        Total="numeric",
        "Women"="numeric",
        "White"="numeric",
        "Black/African American"="numeric",
        "Asian"="numeric",
        "Hispanic/Latino"="numeric"
    ),
    skip = 7
)%>%
    drop_na(Occupation)

Import EPI Data

Labor_force_participation <- read_csv("EPI Data Library - Labor-force participation rate.csv", col_types = c(.default="n", Date="c"))

Medianaverage_hourly_wages <- read_csv("EPI Data Library - Medianaverage hourly wages.csv", col_types = c(.default="n"))

Minimum_wage <- read_csv("EPI Data Library - Minimum wage.csv", col_types = c(.default="n"))

Clean Data

Clean cpsaat11

cpsaat11
## # A tibble: 596 x 7
##    Occupation         Total Women White `Black/African A~ Asian `Hispanic/Latin~
##    <chr>              <dbl> <dbl> <dbl>             <dbl> <dbl>            <dbl>
##  1 Total, 16 years ~ 147795  46.8  78                12.1   6.4             17.6
##  2 Management, prof~  63644  51.7  78.7               9.7   8.6             10.4
##  3 Management, busi~  27143  44.6  81.7               8.8   6.7             10.9
##  4 Management occup~  18564  40.4  83.4               8     5.8             10.7
##  5 Chief executives    1669  29.3  88                 4.3   5.4              7.4
##  6 General and oper~   1057  30.5  84.4               7.1   4.5             12.4
##  7 Legislators           25  NA    NA                NA    NA               NA  
##  8 Advertising and ~     56  52.1  80.5              14.7   3.9              3.5
##  9 Marketing manage~    554  60.7  84.1               5.5   7.6              9.9
## 10 Sales managers       521  30.9  87.6               5.8   4.2              7.6
## # ... with 586 more rows

Looks fine.

Clean Labor_force_participation

Labor_force_participation
## # A tibble: 513 x 13
##    Date    All Women   Men Black `Black Women` `Black Men` Hispanic
##    <chr> <dbl> <dbl> <dbl> <dbl>         <dbl>       <dbl>    <dbl>
##  1 Sep-~  62.2  56.6  68.1  61.2          59.6        63.2     66  
##  2 Aug-~  62.3  56.8  68.2  61.5          59.8        63.5     66.2
##  3 Jul-~  62.4  56.9  68.3  61.6          59.9        63.7     66.3
##  4 Jun-~  62.6  57    68.5  61.8          60.1        63.9     66.5
##  5 May-~  62.7  57.1  68.7  62            60.3        64       66.6
##  6 Apr-~  62.9  57.3  68.9  62.3          60.5        64.3     66.7
##  7 Mar-~  63.1  57.5  69.1  62.6          60.8        64.7     67  
##  8 Feb-~  63.1  57.5  69.2  62.6          60.9        64.7     67  
##  9 Jan-~  63.1  57.5  69.2  62.6          60.9        64.8     66.8
## 10 Dec-~  63.1  57.4  69.2  62.7          60.8        64.8     66.8
## # ... with 503 more rows, and 5 more variables: `Hispanic Women` <dbl>,
## #   `Hispanic Men` <dbl>, White <dbl>, `White Women` <dbl>, `White Men` <dbl>
Labor_force_participation=Labor_force_participation%>%
    mutate(Date=lubridate::my(Date))

Participation=Labor_force_participation%>%
    pivot_longer(-c(Date), names_to = "Race", values_to = "Participation", values_drop_na = T)%>%
    separate(Race, into = c("Race", "Gender"))
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 3012 rows [1, 2,
## 3, 4, 7, 10, 13, 14, 15, 16, 19, 22, 25, 26, 27, 28, 31, 34, 37, 38, ...].
Participation=Participation%>%
    filter(grepl("Women|Men", Race, ignore.case = T))%>%
    mutate(
        Gender=Race,
        Race=NA_character_
    )%>%
    union(
        Participation%>%
            filter(!grepl("Women|Men", Race, ignore.case = T))
    )
rm(Labor_force_participation)

Clean Medianaverage_hourly_wages

Medianaverage_hourly_wages
## # A tibble: 47 x 25
##     Date Median Average `Men Median` `Men Average` `Women Median`
##    <dbl>  <dbl>   <dbl>        <dbl>         <dbl>          <dbl>
##  1  2019   19.3    26.5         21            29.4           17.8
##  2  2018   19.1    26.1         20.5          29             17.2
##  3  2017   18.8    25.5         20.8          28.2           17.2
##  4  2016   18.7    25.3         20.5          28.1           17.0
##  5  2015   18.3    24.8         20.3          27.5           16.7
##  6  2014   18.0    23.9         19.6          26.3           16.3
##  7  2013   18.0    24.1         19.8          26.6           16.5
##  8  2012   17.9    24.0         20.0          26.6           16.6
##  9  2011   18.2    23.8         19.9          26.2           16.9
## 10  2010   18.6    24.3         20.4          27.0           17.0
## # ... with 37 more rows, and 19 more variables: `Women Average` <dbl>, `White
## #   Median` <dbl>, `White Average` <dbl>, `Black Median` <dbl>, `Black
## #   Average` <dbl>, `Hispanic Median` <dbl>, `Hispanic Average` <dbl>, `White
## #   Men Median` <dbl>, `White Men Average` <dbl>, `Black Men Median` <dbl>,
## #   `Black Men Average` <dbl>, `Hispanic Men Median` <dbl>, `Hispanic Men
## #   Average` <dbl>, `White Women Median` <dbl>, `White Women Average` <dbl>,
## #   `Black Women Median` <dbl>, `Black Women Average` <dbl>, `Hispanic Women
## #   Median` <dbl>, `Hispanic Women Average` <dbl>
Wages=Medianaverage_hourly_wages%>%
    pivot_longer(-Date, names_to = "Race", values_to = "Wage", values_drop_na = T)%>%
    separate(Race, into = c("Race", "Gender", "Summary"), fill = "left")

# Race is in the wrong location sometimes
Wages=Wages%>%
    filter(!grepl("Women|Men", Gender, ignore.case = T))%>%
    mutate(
        Race=Gender,
        Gender=NA_character_
    )%>%
    union(
        Wages%>%
            filter(grepl("Women|Men", Gender, ignore.case = T))
    )
# No need to keep the Average and Median split up
Wages=Wages%>%
    pivot_wider(names_from = Summary, values_from = Wage)
rm(Medianaverage_hourly_wages)

Clean Minimum_wage

#adjust for inflation to get to common 2019
Minimum_wage=Minimum_wage%>%
    mutate(
        `Min2019`=priceR::adjust_for_inflation(
            `Federal minimum wage (real 2018 dollars)`,
            2018,
            "US",
            2019
        )
    )
## Retrieving countries data
## Generating URL to request all 297 results
## Retrieving inflation data for US 
## Generating URL to request all 61 results
Minimum_wage=Minimum_wage%>%
    rename(MinCur=`Federal minimum wage (nominal dollars)`)%>%
    select(Min2019, MinCur, Date)

Graph

Average and Medium Wage over Time by Race and Gender

g=Wages%>%
    ggplot(aes(col=Race, x=Date))+
    geom_line(aes(y=Average))+
    geom_line(aes(y=Min2019, col=NULL), data=Minimum_wage, size=2)+
    facet_wrap(~Gender)
ggplotly(g)
g=Wages%>%
    ggplot(aes(col=Race, x=Date))+
    geom_line(aes(y=Median))+
    geom_line(aes(y=Min2019, col=NULL), data=Minimum_wage, size=2)+
    facet_wrap(~Gender)
ggplotly(g)

Scatter Plot over Time

g=Wages%>%
    ggplot()+
    geom_point(aes(x=Median, y=Average, col=Race, shape=Gender, frame=Date))+
    ggtitle("Median vs Average Wage per Race and Gender over Time")
## Warning: Ignoring unknown aesthetics: frame
ggplotly(g)